{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "2afa3f1e",
   "metadata": {},
   "outputs": [],
   "source": [
    "#importing libraries\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import statsmodels.api as sm\n",
    "from scipy.stats.mstats import zscore\n",
    "import scipy.stats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "1dd5903f",
   "metadata": {},
   "outputs": [],
   "source": [
    "# defining basic regression class\n",
    "class regreg:\n",
    "    def __init__(self,df,pred,out):\n",
    "        self.df=df\n",
    "        self.pred=pred\n",
    "        self.out=out\n",
    "    \n",
    "    def r2(self):\n",
    "        X = df[self.pred]\n",
    "        X = sm.add_constant(X)\n",
    "        y=df[self.out]\n",
    "        model = sm.OLS(y, X)\n",
    "        results = model.fit()\n",
    "        DN = {'Model':1,'R':round(np.sqrt(results.rsquared),3), \\\n",
    "          'R Square':round(results.rsquared,3), 'Adjusted R Square':round(results.rsquared_adj,3)}\n",
    "        dfr2 = pd.DataFrame(DN, [0])\n",
    "        return dfr2\n",
    "    \n",
    "    def fval(self):\n",
    "        X = df[self.pred]\n",
    "        X = sm.add_constant(X)\n",
    "        y=df[self.out]\n",
    "        model = sm.OLS(y, X)\n",
    "        results = model.fit()\n",
    "    \n",
    "        DN = {'Model':1,'R':round(np.sqrt(results.rsquared),3), \\\n",
    "          'R Square':round(results.rsquared,3), 'Adjusted R Square':round(results.rsquared_adj,3)}\n",
    "        dfr2 = pd.DataFrame(DN, [0])\n",
    "    \n",
    "        DN1 = {'Model': 1, 'Reference': ['Regression','Residual','Total'], \\\n",
    "       'Sum of Squares':[round(results.centered_tss,3),round(results.centered_tss-results.ssr,3), round(results.ssr,3)], \\\n",
    "       'df': [results.df_model,results.df_resid, results.nobs], \\\n",
    "       'MSE': [results.mse_model,results.mse_resid, results.mse_total],\\\n",
    "       'F':[round(results.fvalue,3), None, None], \\\n",
    "       'Sig.':[round(results.f_pvalue,4),None, None]}\n",
    "        dff = pd.DataFrame(DN1, [0,1,2])\n",
    "        dff.fillna('', inplace=True)\n",
    "        return dff\n",
    "    \n",
    "    def coeff(self):\n",
    "        X = df[self.pred]\n",
    "        X = sm.add_constant(X)\n",
    "        y=df[self.out]\n",
    "        model = sm.OLS(y, X)\n",
    "        results = model.fit()\n",
    "        \n",
    "        xyb = pd.DataFrame(round(results.params,3), X.columns, columns = ['b'])\n",
    "    \n",
    "        xyb['Std. Error']=round(results.bse, 3)\n",
    "    \n",
    "        zx = zscore(df[self.pred])\n",
    "        zx = sm.add_constant(zx)\n",
    "        xyb['beta']=round(sm.OLS(zscore(y), zx).fit().params,3)\n",
    "        xyb.loc['const', 'beta']= None\n",
    "    \n",
    "        xyb['t']=round(results.tvalues,3)\n",
    "\n",
    "        xyb[xyb.isnull()]=' '\n",
    "\n",
    "        LN = []\n",
    "        for aa in range(xyb['t'].size):\n",
    "        #print(xyb['t'][aa])\n",
    "            LN.append(round(scipy.stats.t.sf(abs(xyb['t'][aa]), df=results.df_resid)*2,3))\n",
    "        xyb['p-value']=LN\n",
    "\n",
    "        xyb['CI low']=results.conf_int()[0]\n",
    "        xyb['CI high']=results.conf_int()[1]\n",
    "        return xyb"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "fd436e4d",
   "metadata": {},
   "outputs": [],
   "source": [
    "#importing the dataset\n",
    "df = pd.read_csv('DatasetName.csv')\n",
    "df.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "d0b84940",
   "metadata": {},
   "outputs": [],
   "source": [
    "# defining the regression model\n",
    "rr =regreg(df, ['x1 name', 'x2 name'], 'y name')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "19ff2772",
   "metadata": {},
   "outputs": [],
   "source": [
    "# getting the R2 table\n",
    "rr.r2()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "77358f75",
   "metadata": {},
   "outputs": [],
   "source": [
    "# getting the f-value table\n",
    "rr.fval()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a6b16ddb",
   "metadata": {},
   "outputs": [],
   "source": [
    "# getting the coefficients table\n",
    "rr.coeff()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ced34676",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
